# library
library(readr)
library(igraph)
library(rsample)
library(plotly)
amz <- readRDS(file = '../data/amz_igraph.rds')
amz
## IGRAPH bbad5a9 DN-- 398688 3311554 --
## + attr: name (v/c), title (v/c), group (v/c), sub (v/c)
## + edges from bbad5a9 (vertex names):
## [1] 1->2 1->3 1->4 1->155 1->185 1->233 1->234 1->235 1->3943
## [10] 2->1 2->3 2->4 2->6 2->10 2->47 2->54 2->118 3->1
## [19] 3->2 3->4 3->5 3->34 3->44 3->235 3->4954 3->4955 4->5
## [28] 4->6 4->9 4->36 4->44 4->48 4->58 4->106 4->1032 5->6
## [37] 5->44 5->46 5->47 5->48 5->49 5->50 5->51 5->52 5->53
## [46] 6->5 6->9 6->46 6->47 6->48 6->54 6->55 6->56 6->57
## [55] 6->58 7->5 7->6 7->9 7->46 7->47 7->54 7->58 7->108
## [64] 7->1521 7->1522 8->4 8->5 8->36 8->44 8->51 8->59 8->60
## + ... omitted several edges
Methodology: for each subgraph, randomly select one node from music, book, video, DVD, and keep retrieving the nodes that are connected/related to it until reach 200 nodes
# Function to retrieve connected nodes up to a given count
retrieve_connected_nodes <- function(graph, start_node, count = 30) {
# Initialize the list with the start node
nodes_to_explore <- list(start_node)
connected_nodes <- c(start_node)
# Keep a list to avoid revisiting nodes
visited_nodes <- numeric(0)
# Explore the graph until we reach the desired number of nodes
while (length(nodes_to_explore) > 0 && length(connected_nodes) < count) {
current_node <- nodes_to_explore[[1]]
nodes_to_explore <- nodes_to_explore[-1] # Remove the explored node
# Skip if already visited
if (current_node %in% visited_nodes) next
# Mark as visited
visited_nodes <- c(visited_nodes, current_node)
# Get neighbors and add to nodes to explore
neighbors <- neighbors(graph, current_node)
new_neighbors <- neighbors[!neighbors %in% connected_nodes]
nodes_to_explore <- c(nodes_to_explore, as.list(new_neighbors))
connected_nodes <- c(connected_nodes, new_neighbors)
# Limit the collection if it exceeds the desired count
if (length(connected_nodes) > count) {
connected_nodes <- connected_nodes[1:count]
break
}
}
# Return the vertex sequence of connected nodes
return(connected_nodes)
}
# music
set.seed(194)
# randomly select 1 node from music group
music.random = sample(V(amz)[V(amz)$group == "Music"], 1)
# apply function to get 200 related nodes
music.nodes = retrieve_connected_nodes(amz, music.random)
music.network <- induced_subgraph(amz, music.nodes)
# plot
# Choose a layout that spreads out the nodes more effectively
layout <- layout_with_fr(music.network)
# Set graph margins to zero
par(mar = c(0, 0, 2, 0))
# Plot the graph with improved layout and adjusted aesthetics
plot(music.network, layout = layout,
# Vertex properties
vertex.color = "#88398A", # Deep purple color for vertices
vertex.frame.color = "#FFFFFF", # White border for vertices for better visibility
vertex.size = 5, # Smaller vertex size to avoid overlap
vertex.label = V(music.network)$name, # Ensure labels are set to product IDs or similar
vertex.label.dist = 1, # Distance of labels from vertices
vertex.label.cex = 0.8, # Adjust label size for readability
vertex.label.color = "black", # Change label color to black for contrast
vertex.label.font = 2, # Bold labels
# Edge properties
edge.color = "gray50", # Lighter color for edges
edge.width = 0.2, # Thinner edges
edge.arrow.size = 0.1, # Smaller arrows if directed
# General plot settings
main = "Music Products Base Plot", # Add a title if appropriate
bg = "white" # Background color
)
# Color nodes based on a community detection algorithm to show clusters
music.cluster <- cluster_optimal(music.network)
mycomcols <- c("black", "#D3D3D3", "#88398A")
# Plot the graph with advanced layout and adjusted aesthetics
plot(music.network, layout = layout,
# Vertex properties
vertex.color = mycomcols[music.cluster$membership], # Color vertices by community
vertex.frame.color = "#FFFFFF", # White border for vertices for better visibility
vertex.size = sqrt(degree(music.network)) * 2, # Scale size by square root of degree
vertex.label = V(music.network)$name, # Labels are set to product IDs
vertex.label.dist = 1, # Distance of labels from vertices
vertex.label.cex = 0.6, # Adjust label size for readability
vertex.label.color = "black", # Label color for contrast
vertex.label.font = 2, # Bold labels
# Edge properties
edge.color = "gray50", # Lighter color for edges
edge.width = 0.2, # Thinner edges
edge.arrow.size = 0.1, # Smaller arrows if directed
# General plot settings
main = "Music Products Community", # Add a title
bg = "white" # Background color
)
# Get vertex data including the degree for size scaling
vertex_data <- data.frame(
Id = V(music.network)$name,
x = layout[, 1],
y = layout[, 2],
degree = degree(music.network),
Title = V(music.network)$title,
Group = V(music.network)$group,
Category = V(music.network)$sub
)
# Enhance hover info by including all attributes except x, y coordinates
vertex_data$hoverinfo <- apply(vertex_data[, -c(2, 3)], 1, function(row) {
paste(names(row), row, sep=": ", collapse="<br>")
})
# Get edge data
edge_data <- get.data.frame(music.network, what = "edges")
## Warning: `get.data.frame()` was deprecated in igraph 2.0.0.
## ℹ Please use `as_data_frame()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Join edge data with vertex data to get coordinates for 'from' and 'to'
edge_data <- merge(edge_data, vertex_data, by.x = "from", by.y = "Id", all.x = TRUE)
edge_data <- merge(edge_data, vertex_data, by.x = "to", by.y = "Id", all.x = TRUE, suffixes = c(".from", ".to"))
# Prepare data for Plotly plot
edges <- list(
x = c(rbind(edge_data$x.from, edge_data$x.to, NA)),
y = c(rbind(edge_data$y.from, edge_data$y.to, NA)),
type = "scatter",
mode = "lines",
line = list(color = "grey", width = 0.5)
)
nodes <- list(
x = vertex_data$x,
y = vertex_data$y,
hovertext = vertex_data$hoverinfo,
mode = "markers", # Only markers, no text
marker = list(size = vertex_data$degree * 2,
color = mycomcols[music.cluster$membership]),
type = "scatter",
hoverinfo = "text"
)
# Create the plot
plot_ly() %>%
add_trace(x = edges$x, y = edges$y, mode = edges$mode, type = edges$type, line = edges$line) %>%
add_trace(x = nodes$x, y = nodes$y, hovertext = nodes$hovertext, mode = nodes$mode, type = nodes$type, hoverinfo = "text", marker = nodes$marker) %>%
layout(
title = "Network Visualization of Music Products",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
hovermode = 'closest'
)
print(music.network, full = TRUE)
## IGRAPH 2643976 DN-- 30 206 --
## + attr: name (v/c), title (v/c), group (v/c), sub (v/c)
## + edges (vertex names):
## 219045 -> 239771, 240889, 269387
## 230470 -> 239763, 239764, 239765, 239766, 239767, 239768, 239769,
## 239771, 239772
## 239763 -> 230470, 239764, 239765, 239766, 239768, 239769, 239771,
## 240889, 240890, 240892
## 239764 -> 230470, 239765, 239766, 239771, 240888, 251179, 251180,
## 269387, 269388
## 239765 -> 230470, 239764, 239766, 239769, 239772, 251179, 251180,
## 270419, 287512
## 239766 -> 230470, 239763, 239764, 239765, 239769, 251179, 269388,
## 270419, 287512
## 239767 -> 230470, 239768
## 239768 -> 230470, 239763, 239764, 239766, 239767, 240888, 251177,
## 251179, 270419
## 239769 -> 230470, 239763, 239764, 239765, 239766, 251179, 269388,
## 270419
## 239771 -> 239763, 239764, 240888, 240889, 240890, 240891, 240892,
## 240893, 240894, 240895
## 239772 -> 230470, 239764, 239765, 239766, 251180, 270419, 287512
## 240888 -> 230470, 239764, 239771, 240889, 240890, 240891, 240892,
## 240894, 240899, 304504
## 240889 -> 239771, 240888, 240890, 240891, 240892, 240893, 240899
## 240890 -> 239763, 239771, 240888, 240889, 240891
## 240891 -> 239771, 240888, 240889, 240890, 240892, 240893, 240899
## 240892 -> 239764, 239771, 240888, 240889, 240891, 240899
## 240893 -> 239771, 240889, 240891
## 240894 -> 239764, 239771, 240888, 240889, 240890, 240893, 240899
## 240895 -> 239771, 240893
## 240899 -> 240888, 240889, 240891, 240892, 304504
## 251177 -> 239764, 239765, 239766, 239768, 251179, 269388
## 251179 -> 239764, 239765, 239766, 239769, 240892, 251180, 269387,
## 269388, 270419
## 251180 -> 230470, 239764, 239765, 239766, 239771, 240892, 251179,
## 269387, 269388
## 269387 -> 219045, 239764, 239766, 239771, 251179, 251180, 269388,
## 332118, 332119
## 269388 -> 239764, 239765, 239766, 251177, 251179, 251180, 269387,
## 270419
## 270419 -> 239764, 239765, 239766, 239769, 251179, 269388, 287512
## 287512 -> 239765, 239766, 239772, 240888, 270419
## 304504 -> 239771, 240888, 240889, 240899
## 332118 -> 230470, 239764, 239771, 240889, 251180, 269387, 332119
## 332119 -> 239771, 240889, 251180, 269387, 332118
# book
set.seed(194)
# randomly select 1 node from book group
book.random = sample(V(amz)[V(amz)$group == "Book"], 1)
# apply function to get 200 related nodes
book.nodes = retrieve_connected_nodes(amz, book.random)
book.network <- induced_subgraph(amz, book.nodes)
# plot
# Choose a layout that spreads out the nodes more effectively
layout <- layout_with_fr(book.network)
# Set graph margins to zero
par(mar = c(0, 0, 2, 0))
# Plot the graph with improved layout and adjusted aesthetics
plot(book.network, layout = layout,
# Vertex properties
vertex.color = "#88398A", # Deep purple color for vertices
vertex.frame.color = "#FFFFFF", # White border for vertices for better visibility
vertex.size = 5, # Smaller vertex size to avoid overlap
vertex.label = V(book.network)$name, # Ensure labels are set to product IDs or similar
vertex.label.dist = 1, # Distance of labels from vertices
vertex.label.cex = 0.8, # Adjust label size for readability
vertex.label.color = "black", # Change label color to black for contrast
vertex.label.font = 2, # Bold labels
# Edge properties
edge.color = "gray50", # Lighter color for edges
edge.width = 0.2, # Thinner edges
edge.arrow.size = 0.1, # Smaller arrows if directed
# General plot settings
main = "Book Products Base Plot", # Add a title if appropriate
bg = "white" # Background color
)
# Color nodes based on a community detection algorithm to show clusters
book.cluster <- cluster_optimal(book.network)
mycomcols <- c("black", "#D3D3D3", "#88398A")
# Plot the graph with advanced layout and adjusted aesthetics
plot(book.network, layout = layout,
# Vertex properties
vertex.color = mycomcols[book.cluster$membership], # Color vertices by community
vertex.frame.color = "#FFFFFF", # White border for vertices for better visibility
vertex.size = sqrt(degree(book.network)) * 2, # Scale size by square root of degree
vertex.label = V(book.network)$name, # Labels are set to product IDs
vertex.label.dist = 1, # Distance of labels from vertices
vertex.label.cex = 0.6, # Adjust label size for readability
vertex.label.color = "black", # Label color for contrast
vertex.label.font = 2, # Bold labels
# Edge properties
edge.color = "gray50", # Lighter color for edges
edge.width = 0.2, # Thinner edges
edge.arrow.size = 0.1, # Smaller arrows if directed
# General plot settings
main = "Book Products Community", # Add a title
bg = "white" # Background color
)
# Get vertex data including the degree for size scaling
vertex_data <- data.frame(
Id = V(book.network)$name,
x = layout[, 1],
y = layout[, 2],
degree = degree(book.network),
Title = V(book.network)$title,
Group = V(book.network)$group,
Category = V(book.network)$sub
)
# Enhance hover info by including all attributes except x, y coordinates
vertex_data$hoverinfo <- apply(vertex_data[, -c(2, 3)], 1, function(row) {
paste(names(row), row, sep=": ", collapse="<br>")
})
# Get edge data
edge_data <- get.data.frame(book.network, what = "edges")
# Join edge data with vertex data to get coordinates for 'from' and 'to'
edge_data <- merge(edge_data, vertex_data, by.x = "from", by.y = "Id", all.x = TRUE)
edge_data <- merge(edge_data, vertex_data, by.x = "to", by.y = "Id", all.x = TRUE, suffixes = c(".from", ".to"))
# Prepare data for Plotly plot
edges <- list(
x = c(rbind(edge_data$x.from, edge_data$x.to, NA)),
y = c(rbind(edge_data$y.from, edge_data$y.to, NA)),
type = "scatter",
mode = "lines",
line = list(color = "grey", width = 0.5)
)
nodes <- list(
x = vertex_data$x,
y = vertex_data$y,
hovertext = vertex_data$hoverinfo,
mode = "markers", # Only markers, no text
marker = list(size = vertex_data$degree * 2,
color = mycomcols[book.cluster$membership]),
type = "scatter",
hoverinfo = "text"
)
# Create the plot
plot_ly() %>%
add_trace(x = edges$x, y = edges$y, mode = edges$mode, type = edges$type, line = edges$line) %>%
add_trace(x = nodes$x, y = nodes$y, hovertext = nodes$hovertext, mode = nodes$mode, type = nodes$type, hoverinfo = "text", marker = nodes$marker) %>%
layout(
title = "Network Visualization of Book Products",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
hovermode = 'closest'
)
# music
set.seed(194)
# randomly select 1 node from video group
video.random = sample(V(amz)[V(amz)$group == "Video"], 1)
# apply function to get 200 related nodes
video.nodes = retrieve_connected_nodes(amz, video.random)
video.network <- induced_subgraph(amz, video.nodes)
# plot
# Choose a layout that spreads out the nodes more effectively
layout <- layout_with_fr(video.network)
# Set graph margins to zero
par(mar = c(0, 0, 2, 0))
# Plot the graph with improved layout and adjusted aesthetics
plot(video.network, layout = layout,
# Vertex properties
vertex.color = "#88398A", # Deep purple color for vertices
vertex.frame.color = "#FFFFFF", # White border for vertices for better visibility
vertex.size = 5, # Smaller vertex size to avoid overlap
vertex.label = V(video.network)$name, # Ensure labels are set to product IDs or similar
vertex.label.dist = 1, # Distance of labels from vertices
vertex.label.cex = 0.8, # Adjust label size for readability
vertex.label.color = "black", # Change label color to black for contrast
vertex.label.font = 2, # Bold labels
# Edge properties
edge.color = "gray50", # Lighter color for edges
edge.width = 0.2, # Thinner edges
edge.arrow.size = 0.1, # Smaller arrows if directed
# General plot settings
main = "Video Products Base Plot", # Add a title if appropriate
bg = "white" # Background color
)
# Color nodes based on a community detection algorithm to show clusters
video.cluster <- cluster_optimal(video.network)
mycomcols <- c("black", "#D3D3D3", "#88398A")
# Plot the graph with advanced layout and adjusted aesthetics
plot(video.network, layout = layout,
# Vertex properties
vertex.color = mycomcols[video.cluster$membership], # Color vertices by community
vertex.frame.color = "#FFFFFF", # White border for vertices for better visibility
vertex.size = sqrt(degree(video.network)) * 2, # Scale size by square root of degree
vertex.label = V(video.network)$name, # Labels are set to product IDs
vertex.label.dist = 1, # Distance of labels from vertices
vertex.label.cex = 0.6, # Adjust label size for readability
vertex.label.color = "black", # Label color for contrast
vertex.label.font = 2, # Bold labels
# Edge properties
edge.color = "gray50", # Lighter color for edges
edge.width = 0.2, # Thinner edges
edge.arrow.size = 0.1, # Smaller arrows if directed
# General plot settings
main = "Video Products Community", # Add a title
bg = "white" # Background color
)
# Get vertex data including the degree for size scaling
vertex_data <- data.frame(
Id = V(video.network)$name,
x = layout[, 1],
y = layout[, 2],
degree = degree(video.network),
Title = V(video.network)$title,
Group = V(video.network)$group,
Category = V(video.network)$sub
)
# Enhance hover info by including all attributes except x, y coordinates
vertex_data$hoverinfo <- apply(vertex_data[, -c(2, 3)], 1, function(row) {
paste(names(row), row, sep=": ", collapse="<br>")
})
# Get edge data
edge_data <- get.data.frame(video.network, what = "edges")
# Join edge data with vertex data to get coordinates for 'from' and 'to'
edge_data <- merge(edge_data, vertex_data, by.x = "from", by.y = "Id", all.x = TRUE)
edge_data <- merge(edge_data, vertex_data, by.x = "to", by.y = "Id", all.x = TRUE, suffixes = c(".from", ".to"))
# Prepare data for Plotly plot
edges <- list(
x = c(rbind(edge_data$x.from, edge_data$x.to, NA)),
y = c(rbind(edge_data$y.from, edge_data$y.to, NA)),
type = "scatter",
mode = "lines",
line = list(color = "grey", width = 0.5)
)
nodes <- list(
x = vertex_data$x,
y = vertex_data$y,
hovertext = vertex_data$hoverinfo,
mode = "markers", # Only markers, no text
marker = list(size = vertex_data$degree * 2,
color = mycomcols[video.cluster$membership]),
type = "scatter",
hoverinfo = "text"
)
# Create the plot
plot_ly() %>%
add_trace(x = edges$x, y = edges$y, mode = edges$mode, type = edges$type, line = edges$line) %>%
add_trace(x = nodes$x, y = nodes$y, hovertext = nodes$hovertext, mode = nodes$mode, type = nodes$type, hoverinfo = "text", marker = nodes$marker) %>%
layout(
title = "Network Visualization of Video Products",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
hovermode = 'closest'
)
# music
set.seed(194)
# randomly select 1 node from music group
dvd.random = sample(V(amz)[V(amz)$group == "DVD"], 1)
# apply function to get 200 related nodes
dvd.nodes = retrieve_connected_nodes(amz, dvd.random)
dvd.network <- induced_subgraph(amz, dvd.nodes)
# plot
# Choose a layout that spreads out the nodes more effectively
layout <- layout_with_fr(dvd.network)
# Set graph margins to zero
par(mar = c(0, 0, 2, 0))
# Plot the graph with improved layout and adjusted aesthetics
plot(dvd.network, layout = layout,
# Vertex properties
vertex.color = "#88398A", # Deep purple color for vertices
vertex.frame.color = "#FFFFFF", # White border for vertices for better visibility
vertex.size = 5, # Smaller vertex size to avoid overlap
vertex.label = V(dvd.network)$name, # Ensure labels are set to product IDs or similar
vertex.label.dist = 1, # Distance of labels from vertices
vertex.label.cex = 0.8, # Adjust label size for readability
vertex.label.color = "black", # Change label color to black for contrast
vertex.label.font = 2, # Bold labels
# Edge properties
edge.color = "gray50", # Lighter color for edges
edge.width = 0.2, # Thinner edges
edge.arrow.size = 0.1, # Smaller arrows if directed
# General plot settings
main = "DVD Products Base Plot", # Add a title if appropriate
bg = "white" # Background color
)
# Color nodes based on a community detection algorithm to show clusters
dvd.cluster <- cluster_optimal(dvd.network)
mycomcols <- c("black", "#D3D3D3", "#88398A")
# Plot the graph with advanced layout and adjusted aesthetics
plot(dvd.network, layout = layout,
# Vertex properties
vertex.color = mycomcols[dvd.cluster$membership], # Color vertices by community
vertex.frame.color = "#FFFFFF", # White border for vertices for better visibility
vertex.size = sqrt(degree(dvd.network)) * 2, # Scale size by square root of degree
vertex.label = V(dvd.network)$name, # Labels are set to product IDs
vertex.label.dist = 1, # Distance of labels from vertices
vertex.label.cex = 0.6, # Adjust label size for readability
vertex.label.color = "black", # Label color for contrast
vertex.label.font = 2, # Bold labels
# Edge properties
edge.color = "gray50", # Lighter color for edges
edge.width = 0.2, # Thinner edges
edge.arrow.size = 0.1, # Smaller arrows if directed
# General plot settings
main = "DVD Products Community", # Add a title
bg = "white" # Background color
)
# Get vertex data including the degree for size scaling
vertex_data <- data.frame(
Id = V(dvd.network)$name,
x = layout[, 1],
y = layout[, 2],
degree = degree(dvd.network),
Title = V(dvd.network)$title,
Group = V(dvd.network)$group,
Category = V(dvd.network)$sub
)
# Enhance hover info by including all attributes except x, y coordinates
vertex_data$hoverinfo <- apply(vertex_data[, -c(2, 3)], 1, function(row) {
paste(names(row), row, sep=": ", collapse="<br>")
})
# Get edge data
edge_data <- get.data.frame(dvd.network, what = "edges")
# Join edge data with vertex data to get coordinates for 'from' and 'to'
edge_data <- merge(edge_data, vertex_data, by.x = "from", by.y = "Id", all.x = TRUE)
edge_data <- merge(edge_data, vertex_data, by.x = "to", by.y = "Id", all.x = TRUE, suffixes = c(".from", ".to"))
# Prepare data for Plotly plot
edges <- list(
x = c(rbind(edge_data$x.from, edge_data$x.to, NA)),
y = c(rbind(edge_data$y.from, edge_data$y.to, NA)),
type = "scatter",
mode = "lines",
line = list(color = "grey", width = 0.5)
)
nodes <- list(
x = vertex_data$x,
y = vertex_data$y,
hovertext = vertex_data$hoverinfo,
mode = "markers", # Only markers, no text
marker = list(size = vertex_data$degree * 2,
color = mycomcols[dvd.cluster$membership]),
type = "scatter",
hoverinfo = "text"
)
# Create the plot
plot_ly() %>%
add_trace(x = edges$x, y = edges$y, mode = edges$mode, type = edges$type, line = edges$line) %>%
add_trace(x = nodes$x, y = nodes$y, hovertext = nodes$hovertext, mode = nodes$mode, type = nodes$type, hoverinfo = "text", marker = nodes$marker) %>%
layout(
title = "Network Visualization of DVD Products",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
hovermode = 'closest'
)
Music
# Network Metrics
# Density
network_density <- edge_density(music.network)
cat("Network Density:", network_density, "\n")
## Network Density: 0.2367816
# Average Path Length
avg_path_length <- average.path.length(music.network, directed = FALSE)
## Warning: `average.path.length()` was deprecated in igraph 2.0.0.
## ℹ Please use `mean_distance()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cat("Average Path Length:", avg_path_length, "\n")
## Average Path Length: 1.889655
# Diameter
network_diameter <- diameter(music.network, directed = FALSE)
cat("Network Diameter:", network_diameter, "\n")
## Network Diameter: 3
# Node Metrics
# Degree
node_degree <- degree(music.network)
cat("Node Degree:\n")
## Node Degree:
print(summary(node_degree))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 9.00 14.00 13.73 18.50 27.00
# Betweenness Centrality
node_betweenness <- betweenness(music.network)
cat("Node Betweenness Centrality:\n")
## Node Betweenness Centrality:
print(summary(node_betweenness))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.440 7.782 33.767 32.210 185.015
# Closeness Centrality
node_closeness <- closeness(music.network)
cat("Node Closeness Centrality:\n")
## Node Closeness Centrality:
print(summary(node_closeness))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01205 0.01476 0.01639 0.01626 0.01810 0.02041
# Eigenvector Centrality
node_eigenvector <- evcent(music.network)$vector
## Warning: `evcent()` was deprecated in igraph 2.0.0.
## ℹ Please use `eigen_centrality()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cat("Node Eigenvector Centrality:\n")
## Node Eigenvector Centrality:
print(summary(node_eigenvector))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.09343 0.28684 0.44291 0.46907 0.65328 1.00000
# Edge Metrics
# Edge Betweenness
edge_betweenness <- edge.betweenness(music.network)
## Warning: `edge.betweenness()` was deprecated in igraph 2.0.0.
## ℹ Please use `edge_betweenness()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cat("Edge Betweenness:\n")
## Edge Betweenness:
print(summary(edge_betweenness))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.970 6.071 9.141 10.460 71.000
Network Density: 0.2367816
The density of the network is approximately 0.237, which means that
about 23.7% of all possible edges between nodes are present. This
indicates a moderately connected network, suggesting that a fair number
of music products are frequently co-purchased or associated with each
other, but there is still a significant proportion that are not directly
connected.
Average Path Length: 1.889655
The average number of steps along the shortest paths for all possible
pairs of nodes is about 1.89. This relatively short path length implies
that, on average, any two music products in this network are separated
by less than two steps, indicating that products are closely related and
often co-purchased.
Network Diameter: 3
The diameter of the network is 3, which is the longest shortest path
between any two nodes in the network. This small diameter suggests that
the network is compact, meaning that even the most distantly related
products are not far apart in terms of purchase patterns.
Node Degree
The degree distribution shows that the minimum degree is 3, and the
maximum degree is 27. The median degree is 14, indicating that half of
the nodes have at least 14 connections. The mean degree is about 13.73,
showing a relatively balanced distribution of connections among nodes.
Nodes with higher degrees are likely popular music products frequently
purchased with many others.
Node Betweenness Centrality Betweenness centrality measures the extent to which a node lies on paths between other nodes. A high mean value (33.767) and a maximum value (185.015) indicate that certain nodes play critical roles as bridges or connectors in the network. These nodes are essential for maintaining the network’s overall connectivity and can be key products influencing purchase patterns.
Node Closeness Centrality Closeness centrality measures how close a node is to all other nodes in the network. Nodes with higher closeness centrality (closer to the maximum value of 0.02041) can quickly interact with all other nodes, making them influential in spreading information or trends within the network.
Node Eigenvector Centrality Eigenvector centrality assigns relative scores to all nodes based on the principle that connections to high-scoring nodes contribute more to the score of the node in question. The maximum value of 1.0 and a mean of 0.46907 indicate that some nodes are highly influential, connected to other well-connected nodes, making them central in the network.
Edge Metrics Edge betweenness measures the number of times an edge is part of the shortest path between any two nodes. A mean of 9.141 and a maximum of 71 indicate that some edges are crucial for maintaining the network’s structure. These edges often represent key relationships between products that are essential for the flow of connections within the network.
Summary The metrics indicate that the music products network is moderately dense, compact, and features nodes and edges with varying levels of influence and connectivity. Some nodes and edges are critical for maintaining the network’s overall structure, indicating popular or influential music products and key associations among them.
Book
# Network Metrics
# Density
network_density <- edge_density(book.network)
cat("Network Density:", network_density, "\n")
## Network Density: 0.1367816
# Average Path Length
avg_path_length <- average.path.length(book.network, directed = FALSE)
cat("Average Path Length:", avg_path_length, "\n")
## Average Path Length: 2.604598
# Diameter
network_diameter <- diameter(book.network, directed = FALSE)
cat("Network Diameter:", network_diameter, "\n")
## Network Diameter: 4
# Node Metrics
# Degree
node_degree <- degree(book.network)
cat("Node Degree:\n")
## Node Degree:
print(summary(node_degree))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 5.000 7.000 7.933 10.000 20.000
# Betweenness Centrality
node_betweenness <- betweenness(book.network)
cat("Node Betweenness Centrality:\n")
## Node Betweenness Centrality:
print(summary(node_betweenness))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 2.000 16.300 8.354 126.500
# Closeness Centrality
node_closeness <- closeness(book.network)
cat("Node Closeness Centrality:\n")
## Node Closeness Centrality:
print(summary(node_closeness))
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.01000 0.03846 0.05882 0.05155 0.06667 0.10000 1
# Eigenvector Centrality
node_eigenvector <- evcent(book.network)$vector
cat("Node Eigenvector Centrality:\n")
## Node Eigenvector Centrality:
print(summary(node_eigenvector))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.007996 0.013327 0.024175 0.234801 0.511089 1.000000
# Edge Metrics
# Edge Betweenness
edge_betweenness <- edge.betweenness(book.network)
cat("Edge Betweenness:\n")
## Edge Betweenness:
print(summary(edge_betweenness))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.875 3.333 7.513 9.000 104.500
Network Density: 0.1367816
The density of the network is approximately 0.1367816, which means that
about 13.67% of all possible edges between nodes are present. This
indicates a moderately connected network, suggesting that a fair number
of music products are frequently co-purchased or associated with each
other, but there is still a significant proportion that are not directly
connected.
Average Path Length: 2.604598
The average number of steps along the shortest paths for all possible
pairs of nodes is about 2.6 This relatively short path length implies
that, on average, any two music products in this network are separated
by less than two steps, indicating that products are closely related and
often co-purchased.
Network Diameter: 4 The diameter of the network is 4, which is the longest shortest path between any two nodes in the network. This small diameter suggests that the network is compact, meaning that even the most distantly related products are not far apart in terms of purchase patterns.
Node Degree
he minimum degree indicates the least connected node has only one
connection. The maximum degree indicates the most connected node has 20
connections. The median degree of 7 and the mean degree of approximately
7.93 suggest a moderate level of connectivity across the network, with
most nodes having between 5 and 10 connections.
Node Betweenness Centrality
The minimum betweenness centrality is 0, indicating some nodes are not
on any shortest paths between other nodes. The maximum betweenness
centrality is 126.5, indicating a node that is very central and acts as
a key bridge in the network. The mean betweenness centrality of 16.3 and
a median of 2 suggest that while most nodes have low betweenness, a few
nodes have very high betweenness, acting as important connectors within
the network.
Node Closeness Centrality
The minimum closeness centrality is 0.01, and the maximum is 0.1,
indicating a range of how quickly nodes can reach other nodes. The
median and mean values are around 0.05, suggesting that, on average,
nodes are relatively close to each other. The presence of an NA value
might indicate a disconnected or isolated node.
Node Eigenvector Centrality
The minimum eigenvector centrality is approximately 0.008, and the
maximum is 1.0, indicating that some nodes are much more influential
than others. The mean eigenvector centrality of 0.235 suggests that
there are several influential nodes, but only a few have very high
influence.
Edge Metrics
The minimum edge betweenness is 1, and the maximum is 104.5, suggesting
that some edges are critical for maintaining network connectivity. The
mean edge betweenness of 7.513 indicates that, on average, edges play a
moderate role in connecting different parts of the network. High values
of edge betweenness highlight edges that are crucial for shortest paths
within the network.
Summary
The book network exhibits a moderate level of connectivity, with a
reasonable average path length and a small diameter, indicating a
relatively compact structure. The degree distribution suggests that most
nodes have a moderate number of connections, while the betweenness and
eigenvector centrality metrics highlight the presence of key nodes that
play significant roles in maintaining network connectivity and
influence. The edge betweenness values suggest that certain edges are
crucial for the shortest path connections in the network, emphasizing
the importance of these edges in network dynamics.
Video
# Network Metrics
# Density
network_density <- edge_density(video.network)
cat("Network Density:", network_density, "\n")
## Network Density: 0.1885057
# Average Path Length
avg_path_length <- average.path.length(video.network, directed = FALSE)
cat("Average Path Length:", avg_path_length, "\n")
## Average Path Length: 2.271264
# Diameter
network_diameter <- diameter(video.network, directed = FALSE)
cat("Network Diameter:", network_diameter, "\n")
## Network Diameter: 4
# Node Metrics
# Degree
node_degree <- degree(video.network)
cat("Node Degree:\n")
## Node Degree:
print(summary(node_degree))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 7.25 10.00 10.93 14.75 23.00
# Betweenness Centrality
node_betweenness <- betweenness(video.network)
cat("Node Betweenness Centrality:\n")
## Node Betweenness Centrality:
print(summary(node_betweenness))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.2708 2.0762 23.8000 12.5214 242.4333
# Closeness Centrality
node_closeness <- closeness(video.network)
cat("Node Closeness Centrality:\n")
## Node Closeness Centrality:
print(summary(node_closeness))
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.01389 0.01493 0.02083 0.03198 0.05000 0.07692 1
# Eigenvector Centrality
node_eigenvector <- evcent(video.network)$vector
cat("Node Eigenvector Centrality:\n")
## Node Eigenvector Centrality:
print(summary(node_eigenvector))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.008135 0.016008 0.134973 0.347216 0.704178 1.000000
# Edge Metrics
# Edge Betweenness
edge_betweenness <- edge.betweenness(video.network)
cat("Edge Betweenness:\n")
## Edge Betweenness:
print(summary(edge_betweenness))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.738 2.880 7.720 8.137 168.000
Network Density: 0.1885057
The density of the video network is approximately 0.189, indicating that
about 18.9% of all possible connections between nodes are present. This
suggests a moderately connected network where nodes are reasonably
well-connected but not overly dense.
Average Path Length: 2.271264
The average path length is around 2.27, which means that, on average,
any node can be reached from any other node in just over two steps. This
indicates a relatively compact network where information or interactions
can spread quickly.
Diameter: 4
The diameter is 4, meaning the longest shortest path between any two
nodes in the network is 4 steps. This further supports the notion of a
compact network structure.
Node Degree
The minimum degree is 1, indicating that at least one node has only one
connection. The maximum degree is 23, showing the most connected node
has 23 connections. The median and mean degrees are around 10,
suggesting a moderately connected network where most nodes have a
similar number of connections.
Node Betweenness Centrality
Betweenness centrality values vary widely, with a mean of 23.8 and a
maximum of 242.43. Nodes with high betweenness centrality are critical
for the flow of information, acting as bridges between different parts
of the network. The high maximum value suggests the presence of a few
key nodes that play a significant role in maintaining network
connectivity.
Node Closeness Centrality
Closeness centrality measures indicate how quickly a node can access
other nodes. The mean value of 0.03198 and a maximum of 0.07692 suggest
that some nodes are very well-positioned to access the entire network
quickly. One node has an NA value, which might indicate it is
disconnected or an isolated node.
Node Eigenvector Centrality
Eigenvector centrality values show the influence of nodes based on their
connections to other highly connected nodes. The mean of 0.347216 and a
maximum of 1.0 indicate the presence of highly influential nodes within
the network. These nodes are likely to be central hubs connected to
other significant nodes.
Edge Metrics
Edge betweenness values range from 1 to 168, with a mean of 7.72. High
edge betweenness indicates edges that are crucial for connecting
different parts of the network. The maximum value of 168 suggests that
some edges are very important for maintaining the shortest paths in the
network.
Summary The video network is moderately dense with a low average path length and diameter, indicating a compact structure where information can spread quickly. The degree distribution shows most nodes have a similar number of connections, while betweenness and eigenvector centralities highlight the presence of key nodes and edges that play crucial roles in network connectivity and influence. The metrics collectively suggest a well-connected network with certain nodes and edges being particularly significant for maintaining overall connectivity and efficiency.
The Walktrap algorithm is a community detection method based on random walks. It operates on the principle that short random walks tend to stay within the same community.
The Infomap algorithm is another community detection method that relies on the idea of information compression. It views the task of finding communities as finding an efficient compression of the information flow on the network.
Music
# Community Detection
# Walktrap Algorithm
walktrap_communities <- cluster_walktrap(music.network)
cat("Walktrap Algorithm:\n")
## Walktrap Algorithm:
print(membership(walktrap_communities))
## 219045 230470 239763 239764 239765 239766 239767 239768 239769 239771 239772
## 1 2 1 1 2 2 2 2 2 3 2
## 240888 240889 240890 240891 240892 240893 240894 240895 240899 251177 251179
## 3 3 3 3 3 3 3 3 3 2 2
## 251180 269387 269388 270419 287512 304504 332118 332119
## 1 1 2 2 2 3 1 1
cat("Modularity:", modularity(walktrap_communities), "\n")
## Modularity: 0.3337261
# Infomap Algorithm
infomap_communities <- cluster_infomap(music.network)
cat("Infomap Algorithm:\n")
## Infomap Algorithm:
print(membership(infomap_communities))
## 219045 230470 239763 239764 239765 239766 239767 239768 239769 239771 239772
## 1 1 1 1 1 1 1 1 1 2 1
## 240888 240889 240890 240891 240892 240893 240894 240895 240899 251177 251179
## 2 2 2 2 2 2 2 2 2 1 1
## 251180 269387 269388 270419 287512 304504 332118 332119
## 1 1 1 1 1 2 1 1
cat("Modularity:", modularity(infomap_communities), "\n")
## Modularity: 0.3301913
# Visualize communities
par(mfrow=c(1,3))
plot(walktrap_communities, music.network, main="Walktrap Algorithm", vertex.size=10, edge.arrow.size=0.05, vertex.label.cex=0.7, vertex.label.color="black")
plot(infomap_communities, music.network, main="Infomap Algorithm", vertex.size=10, edge.arrow.size=0.05, vertex.label.cex=0.7, vertex.label.color="black")
Walktrap Algorithm Communities
Detected:
Community 1: Nodes - 219045, 239763, 239764, 251180, 332118,
332119
Community 2: Nodes - 230470, 239765, 239766, 239767, 239768, 239769,
239772, 240888, 251177, 269388, 270419, 287512
Community 3: Nodes - 239771, 240889, 240890, 240891, 240892, 240893,
240894, 240895, 240899, 304504
The modularity score of 0.3337261 indicates a moderate level of community structure within the music network. This score suggests that the communities are reasonably well-defined, but there is still room for improvement in the detection of more distinct communities.
This algorithm tends to detect smaller, more tightly-knit communities, as evidenced by the division of nodes into three distinct groups with moderately high modularity.
Infomap Algorithm Communities
Detected:
Community 1: Nodes - 219045, 230470, 239763, 239764, 239765, 239766,
239767, 239768, 239769, 239772, 251179, 269387, 269388, 270419, 287512,
332118, 332119
Community 2: Nodes - 239771, 240888, 240889, 240890, 240891, 240892,
240893, 240894, 240895, 240899, 304504
The modularity score of 0.3301913 is slightly lower than that of the Walktrap algorithm, indicating a similar but slightly less pronounced community structure. This score reflects a comparable level of community definition within the music network.
This algorithm generally finds slightly larger communities, with some nodes grouped differently compared to the Walktrap algorithm. The modularity score is slightly lower, indicating a similar but less distinct community structure.
Book
# Community Detection
# Walktrap Algorithm
walktrap_communities <- cluster_walktrap(book.network)
cat("Walktrap Algorithm:\n")
## Walktrap Algorithm:
print(membership(walktrap_communities))
## 45 1036 1039 1040 1041 1615 9237 9238 9239 9240 9241
## 3 3 3 3 3 3 3 3 3 3 3
## 24689 33385 33389 33390 33392 33540 39392 41940 50667 56737 56738
## 1 1 1 1 1 1 2 2 1 1 1
## 63101 68821 78445 111338 148774 234339 245481 337176
## 1 1 2 2 1 2 2 2
cat("Modularity:", modularity(walktrap_communities), "\n")
## Modularity: 0.5908128
# Infomap Algorithm
infomap_communities <- cluster_infomap(book.network)
cat("Infomap Algorithm:\n")
## Infomap Algorithm:
print(membership(infomap_communities))
## 45 1036 1039 1040 1041 1615 9237 9238 9239 9240 9241
## 1 1 1 1 1 1 1 1 1 1 1
## 24689 33385 33389 33390 33392 33540 39392 41940 50667 56737 56738
## 2 2 3 2 3 2 4 4 3 3 3
## 63101 68821 78445 111338 148774 234339 245481 337176
## 3 3 4 4 5 5 4 4
cat("Modularity:", modularity(infomap_communities), "\n")
## Modularity: 0.5614716
# Visualize communities
par(mfrow=c(1,2))
plot(walktrap_communities, book.network, main="Walktrap Algorithm", vertex.size=10, edge.arrow.size=0.05, vertex.label.cex=0.7, vertex.label.color="black")
plot(infomap_communities, book.network, main="Infomap Algorithm", vertex.size=10, edge.arrow.size=0.05, vertex.label.cex=0.7, vertex.label.color="black")
Walktrap Algorithm
Communities Detected:
Community 1: Nodes - 24689, 33385, 33389, 33390, 33392, 33540, 56737,
56738, 63101, 68821, 148774
Community 2: Nodes - 39392, 41940, 78445, 111338, 234339, 245481,
337176
Community 3: Nodes - 45, 1036, 1039, 1040, 1041, 1615, 9237, 9238, 9239,
9240, 9241
The modularity score of 0.5908128 indicates a strong community structure. This high score suggests that the detected communities are well-defined and the division is meaningful.
This algorithm identifies three distinct communities. The high modularity score reflects that these communities are densely connected internally. Community 1 and Community 3 are tightly knit, while Community 2 spans a larger section of the network.
Infomap Algorithm
Communities Detected:
Community 1: Nodes - 45, 1036, 1039, 1040, 1041, 1615, 9237, 9238, 9239,
9240, 9241, 33385
Community 2: Nodes - 24689, 33390, 33540
Community 3: Nodes - 33389, 56737, 56738, 63101, 68821
Community 4: Nodes - 39392, 41940, 78445, 111338, 234339, 245481,
337176
Community 5: Nodes - 148774
The modularity score of 0.5614716 indicates a slightly less strong but still significant community structure compared to the Walktrap algorithm.
This algorithm identifies five communities, with more granular divisions compared to the Walktrap algorithm.
The modularity score is slightly lower, indicating that while the communities are meaningful, they are less densely connected than those identified by the Walktrap algorithm.
Video
# Community Detection
# Walktrap Algorithm
walktrap_communities <- cluster_walktrap(video.network)
cat("Walktrap Algorithm:\n")
## Walktrap Algorithm:
print(membership(walktrap_communities))
## 49943 49947 49948 59813 65817 76252 76292 86988 86989 86990 86992
## 1 1 1 1 1 1 2 2 2 2 2
## 86993 87725 90018 95697 95700 101895 108869 109489 114378 126714 149139
## 2 1 2 2 2 1 2 1 1 2 2
## 149795 151468 154328 183012 193550 229284 324495 372824
## 2 1 1 1 2 1 1 1
cat("Modularity:", modularity(walktrap_communities), "\n")
## Modularity: 0.4274242
# Infomap Algorithm
infomap_communities <- cluster_infomap(video.network)
cat("Infomap Algorithm:\n")
## Infomap Algorithm:
print(membership(infomap_communities))
## 49943 49947 49948 59813 65817 76252 76292 86988 86989 86990 86992
## 1 1 1 1 1 1 2 2 2 2 2
## 86993 87725 90018 95697 95700 101895 108869 109489 114378 126714 149139
## 2 1 2 2 2 1 2 1 1 2 2
## 149795 151468 154328 183012 193550 229284 324495 372824
## 2 1 3 1 2 1 3 3
cat("Modularity:", modularity(infomap_communities), "\n")
## Modularity: 0.425119
# Visualize communities
par(mfrow=c(1,3))
plot(walktrap_communities, video.network, main="Walktrap Algorithm", vertex.size=10, edge.arrow.size=0.05, vertex.label.cex=0.7, vertex.label.color="black")
plot(infomap_communities, video.network, main="Infomap Algorithm", vertex.size=10, edge.arrow.size=0.05, vertex.label.cex=0.7, vertex.label.color="black")
Walktrap Algorithm Communities
Detected: Community 1: Nodes - 49943, 49947, 49948, 59813,
65817, 76252, 87725, 101895, 109489, 114378, 151468, 154328, 183012,
229284, 324495, 372824
Community 2: Nodes - 76292, 86988, 86989, 86990, 86992, 86993, 90018,
95697, 95700, 108869, 126714, 149139, 149795, 193550
The modularity score of 0.4274242 indicates a moderately strong community structure. This score suggests that the detected communities are meaningful and well-defined.
This algorithm identifies two main communities, with one larger community (Community 1) and a smaller one (Community 2). The high modularity score reflects that these communities are densely connected internally.
Walktrap Algorithm detects fewer, larger communities with a higher modularity score. This suggests a strong but less detailed community structure. Suitable for understanding broader groupings within the network.
Infomap Algorithm Communities
Detected: Community 1: Nodes - 49943, 49947, 49948, 59813,
65817, 76252, 87725, 101895, 109489, 114378, 151468, 154328, 183012,
229284
Community 2: Nodes - 76292, 86988, 86989, 86990, 86992, 86993, 90018,
95697, 95700, 108869, 126714, 149139, 149795, 193550
Community 3: Nodes - 324495, 372824
The modularity score of 0.425119 is slightly lower than that of the Walktrap algorithm, indicating a similar but slightly less strong community structure.
This algorithm identifies three communities, splitting some of the nodes from the larger community in the Walktrap algorithm into a third, smaller community.
Infomap Algorithm identifies more, smaller communities with a slightly lower modularity score. This indicates a more detailed division of the network, capturing finer community structures. Useful for a more nuanced understanding of the network’s community structure.
# Create the adjacency matrix
adj_matrix <- as_adjacency_matrix(music.network, sparse = FALSE)
# Set up the plot with customizations
par(mar = c(5, 5, 4, 2) + 0.1) # Increase margins for axis labels
# Visualize the adjacency matrix with gridlines and node labels
image(1:nrow(adj_matrix), 1:ncol(adj_matrix), adj_matrix,
main = "Adjacency Matrix For Music",
xlab = "Nodes", ylab = "Nodes",
axes = FALSE, col = c("white", "black"))
# Add axis labels
axis(1, at = 1:nrow(adj_matrix), labels = rownames(adj_matrix), las = 2, cex.axis = 0.7, tick = FALSE)
axis(2, at = 1:ncol(adj_matrix), labels = colnames(adj_matrix), las = 2, cex.axis = 0.7, tick = FALSE)